********************************************************************************
**	PURPOSE: Summary stats and balance tables, create T1, At3a, AT3b
**							
**	INPUTS:  Wide with outcomes.dta
**	
**	OUTPUTS: Balance Main.xlsx - Table 1 
**			 Balance Index and Components.xlsx - X 
**			 Balance Highest Tercile.xlsx - Appendix Table 3a
**			 Balance Lowest Tercile.xlsx - Appendix Table 3b 
**				
**  CREATED/EDITED BY: Nora Gregory, Kayla Wilding, Leah Kim, Hasan Ahamed
**
**	DATE CREATED: 07/25/16
**
**	DATE LAST EDITED: 2/28/2023
********************************************************************************

/*******************************************************************************
0. Setup 
********************************************************************************/
clear all

local filename1 "Balance Main"
local filename2 "Balance Index and Components"
local filename3 "Balance Highest Tercile"
local filename4 "Balance Lowest Tercile"

local sample1 flag_randomized
local sample2 flag_randomized
local sample3 htca
local sample4 ltca

local pdecs 3
local decs 3

local testvars age1 female1 married1 adults1 child1 race_black1 college1 z_risk_i1 ///
	  z_contknow_i1 z_liqcf_i1 z_default_index_all_i1 binindex1 scoredf1 ficoscore081 ///
	  z_credac_i1 z_credac_rev_i1 prevloans1
local testvars2 z_contknow_i1 revbvafford1 bvtoday1 revbvlongterm1 bvregret1 ///
				bvtask1 checkcs1 creport1 creditc1 healthc1 agec1 borrowc1 repayc1 z_liqcf_i1 ///
				incgt30 bvstress1 bvbills1 revbvneed revfinsit savingsbal_mehb951 savslccu60more ///
				z_credac_i1 ihs_openinstalltr1 openinstallB1 ihs_inquiry121 ihs_openrevolvtr1 ///
				opentradeB1 ihs_usecreditoptrst1

/*******************************************************************************
1. Table 1, Appendix Table 3a and 3b  
********************************************************************************/

forval x = 1/4 {
	if `x' == 2 {
		local testvars`x' `testvars2'
	}
	else {
		local testvars`x' `testvars'
	}
	cap log close

	use "$adta/Wide with outcomes.dta", clear	
	
	*labeling variables
	la var z_contknow_i1 "Self control and knowledge index"
	la var z_liqcf_i1 "Liquid index"
	la var z_credac_i1 "Installment Credit Access at Baseline Index"
	la var creditc1 "Correct about credit affecting interest"
	la var healthc1 "Correct about health affecting interest"
	la var agec1	"Correct about age affecting interest"
	la var borrowc1 "Correct about borrowing affecting interest"
	la var repayc1 "Correct about repayment time affecting interest"
	la var savslccu60more "1 = more than 60$ savings"
	la var binnoncblslc1 "1 = loan besides cml at baseline"
	label define treat 0 "Control" 1 "Treatment"
	label val enc treat

	*restrict the sample for each table 
	keep if `sample`x'' == 1 
		local ordercount 1
		foreach var in `testvars`x'' {
				gen order_c`var'= `ordercount'
			*do the t-test of the control and treatment means by treatment status
				ttest `var', by(enc)
				return list

			*store the mean and sd of treatment group
				gen mu_t`var' = r(mu_2)
				gen sd_t`var' = r(sd_2)

			*store the mean and sd of the control group
				
				if r(mu_1) >= -0.001 & r(mu_1) <= 0.00 {
					gen mu_c`var' = abs(r(mu_1))
				}
				else {
					gen mu_c`var' = r(mu_1)
				}
				gen sd_c`var' = r(sd_1)
				
				**some of the means of the standardized vars come out to be -0.000 instead of 0.000 just because of stata rounding errors, so I want to take the absoluate value of the means if they are small

				
			*store the difference of the means and the standard error
				gen mu_dif`var' = string(mu_t`var' - mu_c`var',"%14.`decs'f")
				gen dif_se`var' = r(se)

			*store the label 
				local lab_v`var' : variable label `var'
				gen lab_v`var' = "`lab_v`var''"
				
			*store the p_values 
				gen p_val`var' = r(p)

				local ++ordercount
		}

		**I essentially just created a wide data set wide by each variable, 
		*but want to have it long with all variables in one column and each statistic as its own variable so reshape here
		preserve 
			keep mu_t* mu_c* mu_dif* sd_t* sd_c* dif_se* lab_v* order_c* p_val*
			duplicates drop
			gen i = 1
			reshape long mu_t mu_c mu_dif sd_t sd_c dif_se lab_v order_c p_val, i(i) j(credvar) string

			*format the variables - set the decimal numbers and add parenthesis to the standard errors
				foreach var in mu_t mu_c sd_t sd_c dif_se {
					gen `var'2 = string(`var', "%14.`decs'f")
					drop `var'
					rename `var'2 `var'
				}
				
				foreach var in sd_t sd_c dif_se {
					replace `var' = "(" + `var' + ")"
				}
			expand 2
			sort order_c 
			replace mu_t = sd_t if credvar == credvar[_n-1]
			replace mu_c = sd_c if credvar == credvar[_n-1]
			replace mu_dif = dif_se if credvar == credvar[_n-1]
			drop dif_se sd_t sd_c
			replace lab_v = "" if (lab_v == lab_v[_n-1])
			drop i credvar order_c
			order lab_v mu_c mu_t mu_dif
			
		*add stars for significant values
			replace mu_dif = mu_dif + "***" if p_val <0.01 & lab_v != ""
			replace mu_dif = mu_dif + "**" if (p_val <0.05 & p_val >= 0.01) & lab_v != ""
			replace mu_dif = mu_dif + "*" if (p_val <0.1 & p_val >= 0.05) & lab_v != ""
			
			gen p = string(p_val, "%14.`decs'f")
			replace p = "" if (p == p[_n-1])
			drop p_val
			
		*adding significance and SE notations
			
			loc n1 = _N+1
			loc n2 = _N+2
			set obs `=_N+2'
			replace lab_v = "Standard errors in parentheses" in `n1'
			replace lab_v = "* p<0.1  ** p<0.05  *** p<0.01" in `n2'
			export excel using "$outputtables/`filename`x''.xls", replace first(var)
		restore 
		drop mu_t* mu_c* mu_dif* mu_dif* sd_t* sd_c* dif_se* lab_v*

}

*EOF**
